# -*- coding: utf-8 -*-

import sys
from tqdm import tqdm
from csv import DictReader
import os, math, itertools
from numpy.random import normal
import datetime
import statsmodels, csv
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from pandas.api.types import CategoricalDtype
from scipy.stats import norm
import subprocess

if 'darwin' in sys.platform:
    print('Running \'caffeinate\' on MacOSX to prevent the system from sleeping')
    subprocess.Popen('caffeinate')

########################################################################


def mean(numbers):
    return float(sum(numbers)) / max(len(numbers), 1)

def ols_cls1(dat_t, RHS_vars):
	if 'latentmeanL2'  in RHS_vars:
		raise Exception('LatentmeanL2 cannot be among the controls for multiple imputation.')
	if len(RHS_vars) >0:form = 'latentmean' + ' ~ cpFlL1 + cpL1 +' + ' + '.join(RHS_vars)
	else:form = 'latentmean' + ' ~ cpFlL1 + cpL1 '
	allVars = ['latentmean', 'ccode', 'cpL1', 'cpFlL1', 'ccode'] + RHS_vars
	dat = dat_t[list(set(allVars))].dropna()
	tsts = dat['ccode'].astype(int)
	dat['ccode'] = pd.Categorical(tsts)
	results = smf.ols(form, data=dat).fit(cov_type='cluster', cov_kwds={'groups': dat['ccode']})
	cpFlL1_est = results.params['cpFlL1']
	cpL1_est = results.params['cpL1']
	cpFlL1_se = results.bse['cpFlL1']
	cpL1_se = results.bse['cpL1']
	return ({'Est': {'cpFlL1': cpFlL1_est, 'cpL1': cpL1_est}, 'Se': {'cpFlL1': cpFlL1_se, 'cpL1': cpL1_se}})

def ols_cls_MI(dat_t, RHS_vars):
	if 'latentmeanL2'  in RHS_vars:
		raise Exception('LatentmeanL2 cannot be among the controls for multiple imputation.')
	if RHS_vars != []:
		form = 'latentmean' + ' ~ cpFlL1 + cpL1 + latentmean_pt1 + ' + ' + '.join(RHS_vars)
	elif RHS_vars == []:
		form = 'latentmean ~ cpFlL1 + cpL1 + latentmean_pt1 '

	cpFl_est = []
	cp_est = []
	cpFl_se = []
	cp_se = []
	for i in range(1, 11):
		dat_t['latentmean_pt1'] = dat_t['latentmean_post_' + str(i) + '_L2']
		allVars = ['latentmean', 'ccode', 'cpL1', 'cpFlL1', 'latentmean_pt1', 'ccode'] + RHS_vars
		dat = dat_t[list(set(allVars))].dropna()
		tsts = dat['ccode'].astype(int)
		dat['ccode'] = pd.Categorical(tsts)
		results = smf.ols(form, data=dat).fit(cov_type='cluster', cov_kwds={'groups': dat['ccode']})
		cpFl_est.append(results.params['cpFlL1'])
		cp_est.append(results.params['cpL1'])
		cpFl_se.append(results.bse['cpFlL1'])
		cp_se.append(results.bse['cpL1'])

	q_cpFl = mean(cpFl_est)
	q_cp = mean(cp_est)

	smplVar_cpFl = sum([(x - q_cpFl)**2. for x in cpFl_est])/9.
	smplVar_cp = sum([(x - q_cp)**2. for x in cp_est])/9.

	avgVar_cpFl = mean([x**2. for x in cpFl_se])
	avgVar_cp = mean([x**2. for x in cp_se])

	SE_sqrd_cpFl = avgVar_cpFl + smplVar_cpFl*(1. + 1./10.)
	SE_sqrd_cp = avgVar_cp + smplVar_cp*(1. + 1./10.)

	return ({'Est': {'cpFlL1': q_cpFl, 'cpL1': q_cp}, 'Se': {'cpFlL1': math.sqrt(SE_sqrd_cpFl), 'cpL1': math.sqrt(SE_sqrd_cp)}})

def ols_cls_interac_MI(dat_t, RHS_vars):
	if 'latentmeanL2'  in RHS_vars:
		raise Exception('LatentmeanL2 cannot be among the controls for multiple imputation.')
	if RHS_vars != []:
		form = 'latentmean' + ' ~ cpFlL1 + cpL1 + cpFl_latent_pt + cp_latent_pt + latentmean_pt1 + ' + ' + '.join(RHS_vars)
	elif RHS_vars == []:
		form = 'latentmean ~ cpFlL1 + cpL1 + cpFl_latent_pt + cp_latent_pt + latentmean_pt1 '

	cpFl_est = []
	cpFl_latent_est = []
	cp_est = []
	cp_latent_est = []
	cpFl_se = []
	cpFl_latent_se = []
	cp_se = []
	cp_latent_se = []
	for i in range(1, 11):
		dat_t['latentmean_pt1'] = dat_t['latentmean_post_' + str(i) + '_L2']
		dat_t['cpFl_latent_pt'] = dat_t['latentmean_pt1'] * dat_t['cpFlL1']
		dat_t['cp_latent_pt'] = dat_t['latentmean_pt1'] * dat_t['cpL1']
		allVars = ['latentmean', 'ccode', 'cpL1', 'cpFlL1', 'latentmean_pt1', 'ccode', 'cpFl_latent_pt', 'cp_latent_pt'] + RHS_vars
		dat = dat_t[list(set(allVars))].dropna()
		tsts = dat['ccode'].astype(int)
		dat['ccode'] = pd.Categorical(tsts)
		results = smf.ols(form, data=dat).fit(cov_type='cluster', cov_kwds={'groups': dat['ccode']})

		cpFl_est.append(results.params['cpFlL1'])
		cpFl_latent_est.append(results.params['cpFl_latent_pt'])

		cp_est.append(results.params['cpL1'])
		cp_latent_est.append(results.params['cp_latent_pt'])

		cpFl_se.append(results.bse['cpFlL1'])
		cpFl_latent_se.append(results.bse['cpFl_latent_pt'])

		cp_se.append(results.bse['cpL1'])
		cp_latent_se.append(results.bse['cp_latent_pt'])

	q_cpFl = mean(cpFl_est)
	q_cpFl_latent = mean(cpFl_latent_est)

	q_cp = mean(cp_est)
	q_cp_latent = mean(cp_latent_est)

	smplVar_cpFl = sum([(x - q_cpFl)**2. for x in cpFl_est])/9.
	smplVar_cpFl_latent = sum([(x - q_cpFl_latent)**2. for x in cpFl_latent_est])/9.

	smplVar_cp = sum([(x - q_cp)**2. for x in cp_est])/9.
	smplVar_cp_latent = sum([(x - q_cp_latent)**2. for x in cp_latent_est])/9.

	avgVar_cpFl = mean([x**2. for x in cpFl_se])
	avgVar_cpFl_latent = mean([x**2. for x in cpFl_latent_se])

	avgVar_cp = mean([x**2. for x in cp_se])
	avgVar_cp_latent = mean([x**2. for x in cp_latent_se])

	SE_sqrd_cpFl = avgVar_cpFl + smplVar_cpFl*(1. + 1./10.)
	SE_sqrd_cpFl_latent = avgVar_cpFl_latent + smplVar_cpFl_latent*(1. + 1./10.)

	SE_sqrd_cp = avgVar_cp + smplVar_cp*(1. + 1./10.)
	SE_sqrd_cp_latent = avgVar_cp_latent + smplVar_cp_latent *(1. + 1./10.)

	return ({'Est': {'cpFlL1': q_cpFl,
						'cpL1': q_cp,
						'cpFl_latent': q_cpFl_latent,
						'cp_latent': q_cp_latent
					},
			'Se': {'cpFlL1': math.sqrt(SE_sqrd_cpFl),
						'cpL1': math.sqrt(SE_sqrd_cp),
						'cpFl_latent':math.sqrt(SE_sqrd_cpFl_latent),
						'cp_latent':math.sqrt(SE_sqrd_cp_latent),
						}
			}
		)



ebaVars = [
	 'e_migdppclnL2',
	 'e_migdpgroL2',
	 'ln_rexportspgdpL2',
	 'oil_binaryL2',
	 'ethfracL2',
	 'ln_tpopL2',
	 'tpopgroL2',
	 'cheibubDemoL2',
	 'cheibubPresidL2',
	 'cheibubMilL2',
	 'gwf_party_wDemoL2',
	 'gwf_monarch_wDemoL2',
	 'gwf_personal_wDemoL2',
	 'cheibubLegisL2',
	 'ln_milperpcL2',
	 'ln_milperL2',
	 'ln_rCOWmilexL2',
	 'ln_CowmilexpgdpL2',
	 'ln_rCOWmilexpcL2',
	 'ln_rCOWmilexpsoldL2',
	 'cbd_changeCowMilexL2',
	 'effectivenumberL2',
	 'paramilitary_bscL2',
	 'counterbal_pthL2',
	 'counterbalancingL2',
	 'warL2',
	 'CW',
	 'intra_warL2',
	 'prioL2',
	 'OppNonViolBanksL2',
	 'ncpL1',
	 'ncpFlL1',
	 'purgesL2',
	 'latentmeanL2',#,
	 'iccprL2', #iccpr signatory
	 'ComLaw', #common law
	 'ythblgapL2', #youth population
	 'xconstL2' #executive constraints (polity IV)
	 ]

working_dir = os.path.dirname(os.path.realpath(__file__))

keepvars = ebaVars + ['latentmean', 'cpFlL1', 'cpL1', 'ccode']
for i in range(1, 11):
	keepvars += ['latentmean_post_' + str(i) + '_L2']

dat_t = []

dat_t = pd.read_csv(working_dir + '/ts_bsc.csv').filter(items = keepvars)
dat_t['ccode'] = dat_t['ccode'].apply(str)

path_out = working_dir + '/eba.out.bscB/'
fldNms = ['ID', 'Coef', 'SE', 'tVal', 'pVal']

pt_levels = [-2., -1.5, -1., -.5, 0., .5, 1., 1.5, 2., 2.5, 3.]

for k in range(0, 7):
	print ('Starting regressions of length ' + str(k))

	j = 1
	opened_filesFl = {}
	opened_filesSc = {}
	for pt_lev in pt_levels:
		fileFl = open(path_out + 'FailedCoupsEBA_pt_'+ str(pt_lev) + '_' + str(k)  + '.csv', 'w')
		fileSc = open(path_out + 'SuccCoupsEBA_pt_'+ str(pt_lev) + '_' + str(k)  + '.csv', 'w')

		writerFl = csv.writer(fileFl, delimiter=",")
		writerFl.writerow(fldNms)
		writerSc = csv.writer(fileSc, delimiter=",")
		writerSc.writerow(fldNms)

		opened_filesFl[str(pt_lev)] = writerFl
		opened_filesSc[str(pt_lev)] = writerSc

	fileId = open(path_out + 'ID_Refs_' + str(k) + '.csv', 'w')
	writerId = csv.writer(fileId, delimiter=",")
	writerId.writerow(['ID', 'Formula'])

	cmbnations = list(itertools.combinations(ebaVars, k))
	last_round = -9
	for i in tqdm(range(len(cmbnations)), desc = 'Progress for length ' + str(k), unit_divisor=1000):
	#for i in range(len(cmbnations)):
		the_vars = cmbnations[i]
		ID = 'K_' + str(k) + "_" + str(i+1)
		RHS_vars = [x for x in the_vars]

		EstFl = None
		EstFlLatent = None
		SeFl = None
		SeFlLatent = None

		EstSc = None
		EstScLatent = None
		SeSc = None
		SeScLatent = None

		try:
			rslts = ols_cls_interac_MI(dat_t, RHS_vars)
		except:
			print('OLS did not work for variables ' + ', '.join(RHS_vars))

		try:
			EstFl = rslts['Est']['cpFlL1']
			EstFlLatent = rslts['Est']['cpFl_latent']

			SeFl = rslts['Se']['cpFlL1']
			SeFlLatent = rslts['Se']['cpFl_latent']

		except:
			pass


		try:
			EstSc = rslts['Est']['cpL1']
			EstScLatent = rslts['Est']['cp_latent']

			SeSc = rslts['Se']['cpL1']
			SeScLatent = rslts['Se']['cp_latent']
		except:
			pass

		for pt_lev in pt_levels:

			try:
				treat_Fl = EstFl + pt_lev*EstFlLatent
			except:
				treat_Fl = None
			try:
				treat_Sc = EstSc + pt_lev*EstScLatent
			except:
				treat_Sc = None

			try:
				Se_Fl = math.sqrt(SeFl**2. + pt_lev**2.*SeFlLatent**2.)
			except:
				Se_Fl = None
			try:
				Se_Sc = math.sqrt(SeSc**2. + pt_lev**2.*SeScLatent**2.)
			except:
				Se_Sc = None

			try:
				tValFl = float(treat_Fl)/float(Se_Fl)
			except:
				tValFl = None

			try:
				tValSc = float(treat_Sc)/float(Se_Sc)
			except:
				tValSc = None

			try:
				pValFl = 2.*(1.-norm.cdf(abs(tValFl)))
			except:
				pValFl = None

			try:
				pValSc = 2.*(1.-norm.cdf(abs(tValSc)))
			except:
				pValSc = None


			row_cpFl = [ID,
					treat_Fl,
					Se_Fl,
					tValFl,
					pValFl
					]

			row_cp = [ID,
					treat_Sc,
					Se_Sc,
					tValSc,
					pValSc
					]


			if last_round != int(k) and last_round != -9:
				opened_filesFl[str(pt_lev)].close()
				opened_filesSc[str(pt_lev)].close()

				fileFl = open(path_out + 'FailedCoupsEBA_pt_'+ str(pt_lev) + '_' + str(k) + '.csv', 'w')
				fileSc = open(path_out + 'SuccCoupsEBA_pt_'+ str(pt_lev) + '_' + str(k) + '.csv', 'w')

				writerFl = csv.writer(fileFl, delimiter=",")
				writerFl.writerow(fldNms)
				writerSc = csv.writer(fileSc, delimiter=",")
				writerSc.writerow(fldNms)

				opened_filesFl[str(pt_lev)] = writerFl
				opened_filesSc[str(pt_lev)] = writerSc

			opened_filesFl[str(pt_lev)].writerow(row_cpFl)
			opened_filesSc[str(pt_lev)].writerow(row_cp)

		if last_round != k and last_round != -9:
				fileId.close()
				fileId = open(path_out + 'ID_Refs_' + str(k) + '.csv', 'w')
				writerId = csv.writer(fileId, delimiter=",")
				writerId.writerow(['ID', 'Formula'])

		writerId.writerow([ID, 'latentmean ~ cpFlL1 + cpL1 + cpFl_latent_pt + cp_latent_pt + latentmean_pt1 +' + ' + '.join(RHS_vars)])
	last_round = int(k)



